{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# `vaex` @ PyData Budapest 2020\n",
    "\n",
    "## Machine Learning Example - \"Deployment\"\n",
    "\n",
    "To find out more details check out\n",
    "[ML impossible: Train 1 billion samples in 5 minutes on your laptop using Vaex and Scikit-Learn](https://towardsdatascience.com/ml-impossible-train-a-1-billion-sample-model-in-20-minutes-with-vaex-and-scikit-learn-on-your-9e2968e6f385).\n",
    "\n",
    "Running this notebooks requires `vaex==3.0.0`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-06-10T17:22:08.393837Z",
     "start_time": "2020-06-10T17:22:07.525172Z"
    }
   },
   "outputs": [],
   "source": [
    "import vaex\n",
    "\n",
    "import warnings; warnings.simplefilter('ignore')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Load the test data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-06-10T17:22:11.028148Z",
     "start_time": "2020-06-10T17:22:10.961348Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of samples in the training set: 151,762,675\n",
      "Number of samples in the test set:       26,781,649\n"
     ]
    }
   ],
   "source": [
    "df = vaex.open('/data/taxi/yellow_taxi_2012.hdf5')\n",
    "\n",
    "# Train / test split (by date)\n",
    "df_train, df_test = df.ml.train_test_split(test_size=0.15)\n",
    "\n",
    "print(f'Number of samples in the training set: {len(df_train):,}')\n",
    "print(f'Number of samples in the test set:       {len(df_test):,}')\n",
    "\n",
    "# Check if the lengths of the datasets match\n",
    "assert len(df) == len(df_test) + len(df_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Inspect the test set"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-06-10T17:22:14.934078Z",
     "start_time": "2020-06-10T17:22:14.866649Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead>\n",
       "<tr><th>#                                     </th><th>vendor_id  </th><th>pickup_datetime              </th><th>dropoff_datetime             </th><th>passenger_count  </th><th>payment_type  </th><th>trip_distance     </th><th>pickup_longitude  </th><th>pickup_latitude   </th><th>rate_code  </th><th>store_and_fwd_flag  </th><th>dropoff_longitude  </th><th>dropoff_latitude  </th><th>fare_amount       </th><th>surcharge  </th><th>mta_tax  </th><th>tip_amount       </th><th>tolls_amount     </th><th>total_amount      </th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td><i style='opacity: 0.6'>0</i>         </td><td>CMT        </td><td>2012-01-10 23:55:50.000000000</td><td>2012-01-11 00:03:39.000000000</td><td>1                </td><td>CRD           </td><td>1.7000000476837158</td><td>-73.99468994140625</td><td>40.725032806396484</td><td>1.0        </td><td>0.0                 </td><td>-73.9759521484375  </td><td>40.73078155517578 </td><td>6.900000095367432 </td><td>0.5        </td><td>0.5      </td><td>1.0              </td><td>0.0              </td><td>8.899999618530273 </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>1</i>         </td><td>CMT        </td><td>2012-01-11 19:18:25.000000000</td><td>2012-01-11 19:26:10.000000000</td><td>1                </td><td>CSH           </td><td>1.100000023841858 </td><td>-73.98795318603516</td><td>40.75294876098633 </td><td>1.0        </td><td>0.0                 </td><td>-73.9945297241211  </td><td>40.76103973388672 </td><td>6.099999904632568 </td><td>1.0        </td><td>0.5      </td><td>0.0              </td><td>0.0              </td><td>7.599999904632568 </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>2</i>         </td><td>CMT        </td><td>2012-01-11 19:19:19.000000000</td><td>2012-01-11 19:48:15.000000000</td><td>2                </td><td>CRD           </td><td>18.0              </td><td>-73.78309631347656</td><td>40.6485481262207  </td><td>2.0        </td><td>0.0                 </td><td>-73.99613189697266 </td><td>40.747623443603516</td><td>45.0              </td><td>0.0        </td><td>0.5      </td><td>10.0600004196167 </td><td>4.800000190734863</td><td>60.36000061035156 </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>3</i>         </td><td>CMT        </td><td>2012-01-11 19:19:21.000000000</td><td>2012-01-11 19:27:00.000000000</td><td>1                </td><td>CRD           </td><td>1.7000000476837158</td><td>-73.96751403808594</td><td>40.758453369140625</td><td>1.0        </td><td>0.0                 </td><td>-73.95658111572266 </td><td>40.779903411865234</td><td>6.900000095367432 </td><td>1.0        </td><td>0.5      </td><td>1.0              </td><td>0.0              </td><td>9.399999618530273 </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>4</i>         </td><td>CMT        </td><td>2012-01-11 14:38:15.000000000</td><td>2012-01-11 14:43:51.000000000</td><td>1                </td><td>CSH           </td><td>1.2000000476837158</td><td>-74.01131439208984</td><td>40.711448669433594</td><td>1.0        </td><td>0.0                 </td><td>-74.00286865234375 </td><td>40.72813034057617 </td><td>5.699999809265137 </td><td>0.0        </td><td>0.5      </td><td>0.0              </td><td>0.0              </td><td>6.199999809265137 </td></tr>\n",
       "<tr><td>...                                   </td><td>...        </td><td>...                          </td><td>...                          </td><td>...              </td><td>...           </td><td>...               </td><td>...               </td><td>...               </td><td>...        </td><td>...                 </td><td>...                </td><td>...               </td><td>...               </td><td>...        </td><td>...      </td><td>...              </td><td>...              </td><td>...               </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>26,781,644</i></td><td>VTS        </td><td>2012-02-11 23:28:00.000000000</td><td>2012-02-11 23:45:00.000000000</td><td>3                </td><td>CSH           </td><td>6.119999885559082 </td><td>-73.98193359375   </td><td>40.74324035644531 </td><td>1.0        </td><td>nan                 </td><td>-74.01116943359375 </td><td>40.71636962890625 </td><td>16.899999618530273</td><td>0.5        </td><td>0.5      </td><td>0.0              </td><td>0.0              </td><td>17.899999618530273</td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>26,781,645</i></td><td>VTS        </td><td>2012-02-11 22:46:00.000000000</td><td>2012-02-11 22:55:00.000000000</td><td>1                </td><td>CSH           </td><td>1.5499999523162842</td><td>-73.9814224243164 </td><td>40.67967987060547 </td><td>1.0        </td><td>nan                 </td><td>-73.96326446533203 </td><td>40.688507080078125</td><td>6.900000095367432 </td><td>0.5        </td><td>0.5      </td><td>0.0              </td><td>0.0              </td><td>7.900000095367432 </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>26,781,646</i></td><td>VTS        </td><td>2012-02-11 23:22:00.000000000</td><td>2012-02-11 23:37:00.000000000</td><td>6                </td><td>CSH           </td><td>2.7899999618530273</td><td>-73.9787826538086 </td><td>40.77758026123047 </td><td>1.0        </td><td>nan                 </td><td>-74.00340270996094 </td><td>40.74978256225586 </td><td>10.5              </td><td>0.5        </td><td>0.5      </td><td>0.0              </td><td>0.0              </td><td>11.5              </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>26,781,647</i></td><td>VTS        </td><td>2012-02-11 23:26:00.000000000</td><td>2012-02-11 23:38:00.000000000</td><td>1                </td><td>CRD           </td><td>3.009999990463257 </td><td>-74.00403594970703</td><td>40.73289108276367 </td><td>1.0        </td><td>nan                 </td><td>-74.00830078125    </td><td>40.71181106567383 </td><td>9.699999809265137 </td><td>0.5        </td><td>0.5      </td><td>2.549999952316284</td><td>0.0              </td><td>13.25             </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>26,781,648</i></td><td>VTS        </td><td>2012-02-11 22:48:00.000000000</td><td>2012-02-11 22:54:00.000000000</td><td>1                </td><td>CSH           </td><td>1.1100000143051147</td><td>-73.9842529296875 </td><td>40.75496292114258 </td><td>1.0        </td><td>nan                 </td><td>-73.98495483398438 </td><td>40.74455261230469 </td><td>5.300000190734863 </td><td>0.5        </td><td>0.5      </td><td>0.0              </td><td>0.0              </td><td>6.300000190734863 </td></tr>\n",
       "</tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "#           vendor_id    pickup_datetime                dropoff_datetime               passenger_count    payment_type    trip_distance       pickup_longitude    pickup_latitude     rate_code    store_and_fwd_flag    dropoff_longitude    dropoff_latitude    fare_amount         surcharge    mta_tax    tip_amount         tolls_amount       total_amount\n",
       "0           CMT          2012-01-10 23:55:50.000000000  2012-01-11 00:03:39.000000000  1                  CRD             1.7000000476837158  -73.99468994140625  40.725032806396484  1.0          0.0                   -73.9759521484375    40.73078155517578   6.900000095367432   0.5          0.5        1.0                0.0                8.899999618530273\n",
       "1           CMT          2012-01-11 19:18:25.000000000  2012-01-11 19:26:10.000000000  1                  CSH             1.100000023841858   -73.98795318603516  40.75294876098633   1.0          0.0                   -73.9945297241211    40.76103973388672   6.099999904632568   1.0          0.5        0.0                0.0                7.599999904632568\n",
       "2           CMT          2012-01-11 19:19:19.000000000  2012-01-11 19:48:15.000000000  2                  CRD             18.0                -73.78309631347656  40.6485481262207    2.0          0.0                   -73.99613189697266   40.747623443603516  45.0                0.0          0.5        10.0600004196167   4.800000190734863  60.36000061035156\n",
       "3           CMT          2012-01-11 19:19:21.000000000  2012-01-11 19:27:00.000000000  1                  CRD             1.7000000476837158  -73.96751403808594  40.758453369140625  1.0          0.0                   -73.95658111572266   40.779903411865234  6.900000095367432   1.0          0.5        1.0                0.0                9.399999618530273\n",
       "4           CMT          2012-01-11 14:38:15.000000000  2012-01-11 14:43:51.000000000  1                  CSH             1.2000000476837158  -74.01131439208984  40.711448669433594  1.0          0.0                   -74.00286865234375   40.72813034057617   5.699999809265137   0.0          0.5        0.0                0.0                6.199999809265137\n",
       "...         ...          ...                            ...                            ...                ...             ...                 ...                 ...                 ...          ...                   ...                  ...                 ...                 ...          ...        ...                ...                ...\n",
       "26,781,644  VTS          2012-02-11 23:28:00.000000000  2012-02-11 23:45:00.000000000  3                  CSH             6.119999885559082   -73.98193359375     40.74324035644531   1.0          nan                   -74.01116943359375   40.71636962890625   16.899999618530273  0.5          0.5        0.0                0.0                17.899999618530273\n",
       "26,781,645  VTS          2012-02-11 22:46:00.000000000  2012-02-11 22:55:00.000000000  1                  CSH             1.5499999523162842  -73.9814224243164   40.67967987060547   1.0          nan                   -73.96326446533203   40.688507080078125  6.900000095367432   0.5          0.5        0.0                0.0                7.900000095367432\n",
       "26,781,646  VTS          2012-02-11 23:22:00.000000000  2012-02-11 23:37:00.000000000  6                  CSH             2.7899999618530273  -73.9787826538086   40.77758026123047   1.0          nan                   -74.00340270996094   40.74978256225586   10.5                0.5          0.5        0.0                0.0                11.5\n",
       "26,781,647  VTS          2012-02-11 23:26:00.000000000  2012-02-11 23:38:00.000000000  1                  CRD             3.009999990463257   -74.00403594970703  40.73289108276367   1.0          nan                   -74.00830078125      40.71181106567383   9.699999809265137   0.5          0.5        2.549999952316284  0.0                13.25\n",
       "26,781,648  VTS          2012-02-11 22:48:00.000000000  2012-02-11 22:54:00.000000000  1                  CSH             1.1100000143051147  -73.9842529296875   40.75496292114258   1.0          nan                   -73.98495483398438   40.74455261230469   5.300000190734863   0.5          0.5        0.0                0.0                6.300000190734863"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Apply the state to the test DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-06-10T17:22:22.860092Z",
     "start_time": "2020-06-10T17:22:22.275281Z"
    }
   },
   "outputs": [],
   "source": [
    "df_test.state_load('./taxi_ml_state.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-06-10T17:22:26.911176Z",
     "start_time": "2020-06-10T17:22:26.228616Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<thead>\n",
       "<tr><th>#                                     </th><th>vendor_id  </th><th>pickup_datetime              </th><th>dropoff_datetime             </th><th>passenger_count  </th><th>payment_type  </th><th>trip_distance     </th><th>pickup_longitude  </th><th>pickup_latitude   </th><th>rate_code  </th><th>store_and_fwd_flag  </th><th>dropoff_longitude  </th><th>dropoff_latitude  </th><th>fare_amount       </th><th>surcharge  </th><th>mta_tax  </th><th>tip_amount       </th><th>tolls_amount  </th><th>total_amount      </th><th>trip_duration_min  </th><th>trip_speed_mph    </th><th>pickup_time       </th><th>pickup_day  </th><th>pickup_is_weekend  </th><th>arc_distance       </th><th>direction_angle    </th><th>PCA_0                </th><th>PCA_1                 </th><th>PCA_2                </th><th>PCA_3                </th><th>pickup_time_x      </th><th>pickup_time_y       </th><th>pickup_day_x        </th><th>pickup_day_y       </th><th>direction_angle_x  </th><th>direction_angle_y   </th><th>standard_scaled_arc_distance  </th><th>predicted_duration_min  </th><th>pred_final        </th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
       "<tr><td><i style='opacity: 0.6'>0</i>         </td><td>CMT        </td><td>2012-01-10 23:55:50.000000000</td><td>2012-01-11 00:03:39.000000000</td><td>1                </td><td>CRD           </td><td>1.7000000476837158</td><td>-73.99468994140625</td><td>40.725032806396484</td><td>1.0        </td><td>0.0                 </td><td>-73.9759521484375  </td><td>40.73078155517578 </td><td>6.900000095367432 </td><td>0.5        </td><td>0.5      </td><td>1.0              </td><td>0.0           </td><td>8.899999618530273 </td><td>7.816666666666666  </td><td>13.049040877742808</td><td>23.916666666666668</td><td>1           </td><td>0                  </td><td>1.299300193786621  </td><td>72.94400024414062  </td><td>-0.029757998883724213</td><td>-0.004688636399805546 </td><td>-0.01604314148426056 </td><td>-0.014438532292842865</td><td>0.9997620270799091 </td><td>-0.0218148850345609 </td><td>0.6234898018587336  </td><td>0.7818314824680298 </td><td>0.2933062016963959 </td><td>0.9560185670852661  </td><td>0.14673005044460297           </td><td>10.17210051291147       </td><td>10.17210051291147 </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>1</i>         </td><td>CMT        </td><td>2012-01-11 19:18:25.000000000</td><td>2012-01-11 19:26:10.000000000</td><td>1                </td><td>CSH           </td><td>1.100000023841858 </td><td>-73.98795318603516</td><td>40.75294876098633 </td><td>1.0        </td><td>0.0                 </td><td>-73.9945297241211  </td><td>40.76103973388672 </td><td>6.099999904632568 </td><td>1.0        </td><td>0.5      </td><td>0.0              </td><td>0.0           </td><td>7.599999904632568 </td><td>7.75               </td><td>8.51612921684019  </td><td>19.3              </td><td>2           </td><td>0                  </td><td>0.4798426032066345 </td><td>-39.10504150390625 </td><td>-0.003371396567672491</td><td>0.00664413021877408   </td><td>-0.001381831243634224</td><td>0.01789921149611473  </td><td>0.3338068592337709 </td><td>-0.9426414910921784 </td><td>-0.22252093395631434</td><td>0.9749279121818236 </td><td>0.775990903377533  </td><td>-0.6307440996170044 </td><td>-0.7633066773414612           </td><td>9.691096448206059       </td><td>9.691096448206059 </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>2</i>         </td><td>CMT        </td><td>2012-01-11 19:19:21.000000000</td><td>2012-01-11 19:27:00.000000000</td><td>1                </td><td>CRD           </td><td>1.7000000476837158</td><td>-73.96751403808594</td><td>40.758453369140625</td><td>1.0        </td><td>0.0                 </td><td>-73.95658111572266 </td><td>40.779903411865234</td><td>6.900000095367432 </td><td>1.0        </td><td>0.5      </td><td>1.0              </td><td>0.0           </td><td>9.399999618530273 </td><td>7.65               </td><td>13.333333707323261</td><td>19.316666666666666</td><td>2           </td><td>0                  </td><td>0.8592353463172913 </td><td>27.00758934020996  </td><td>0.013282216154038906 </td><td>-0.0064217280596494675</td><td>0.03550034016370773  </td><td>-0.002972794696688652</td><td>0.3379167180033267 </td><td>-0.9411760152563707 </td><td>-0.22252093395631434</td><td>0.9749279121818236 </td><td>0.8909463882446289 </td><td>0.4541085362434387  </td><td>-0.3419775664806366           </td><td>9.76290952872819        </td><td>9.76290952872819  </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>3</i>         </td><td>CMT        </td><td>2012-01-11 14:38:15.000000000</td><td>2012-01-11 14:43:51.000000000</td><td>1                </td><td>CSH           </td><td>1.2000000476837158</td><td>-74.01131439208984</td><td>40.711448669433594</td><td>1.0        </td><td>0.0                 </td><td>-74.00286865234375 </td><td>40.72813034057617 </td><td>5.699999809265137 </td><td>0.0        </td><td>0.5      </td><td>0.0              </td><td>0.0           </td><td>6.199999809265137 </td><td>5.6                </td><td>12.857143368039813</td><td>14.633333333333333</td><td>2           </td><td>0                  </td><td>0.6643630266189575 </td><td>26.85257339477539  </td><td>-0.050594620406627655</td><td>0.00048208795487880707</td><td>-0.033315420150756836</td><td>0.006374814547598362 </td><td>-0.7716245833877202</td><td>-0.6360782202777636 </td><td>-0.22252093395631434</td><td>0.9749279121818236 </td><td>0.8921717405319214 </td><td>0.45169636607170105 </td><td>-0.558390200138092            </td><td>11.53348426067309       </td><td>11.53348426067309 </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>4</i>         </td><td>VTS        </td><td>2012-01-09 19:14:00.000000000</td><td>2012-01-09 19:20:00.000000000</td><td>1                </td><td>CSH           </td><td>1.25              </td><td>-73.99333190917969</td><td>40.727718353271484</td><td>1.0        </td><td>nan                 </td><td>-73.9815673828125  </td><td>40.7392463684082  </td><td>6.099999904632568 </td><td>1.0        </td><td>0.5      </td><td>0.0              </td><td>0.0           </td><td>7.599999904632568 </td><td>6.0                </td><td>12.5              </td><td>19.233333333333334</td><td>0           </td><td>0                  </td><td>0.842030942440033  </td><td>45.581756591796875 </td><td>-0.026794197037816048</td><td>-0.004166812635958195 </td><td>-0.01217577699571848 </td><td>-0.005045588128268719</td><td>0.31730465640509226</td><td>-0.9483236552061993 </td><td>1.0                 </td><td>0.0                </td><td>0.6998907923698425 </td><td>0.714249849319458   </td><td>-0.3610836863517761           </td><td>9.972671716598873       </td><td>9.972671716598873 </td></tr>\n",
       "<tr><td>...                                   </td><td>...        </td><td>...                          </td><td>...                          </td><td>...              </td><td>...           </td><td>...               </td><td>...               </td><td>...               </td><td>...        </td><td>...                 </td><td>...                </td><td>...               </td><td>...               </td><td>...        </td><td>...      </td><td>...              </td><td>...           </td><td>...               </td><td>...                </td><td>...               </td><td>...               </td><td>...         </td><td>...                </td><td>...                </td><td>...                </td><td>...                  </td><td>...                   </td><td>...                  </td><td>...                  </td><td>...                </td><td>...                 </td><td>...                 </td><td>...                </td><td>...                </td><td>...                 </td><td>...                           </td><td>...                     </td><td>...               </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>21,229,613</i></td><td>VTS        </td><td>2012-02-11 23:28:00.000000000</td><td>2012-02-11 23:45:00.000000000</td><td>3                </td><td>CSH           </td><td>6.119999885559082 </td><td>-73.98193359375   </td><td>40.74324035644531 </td><td>1.0        </td><td>nan                 </td><td>-74.01116943359375 </td><td>40.71636962890625 </td><td>16.899999618530273</td><td>0.5        </td><td>0.5      </td><td>0.0              </td><td>0.0           </td><td>17.899999618530273</td><td>17.0               </td><td>21.59999959609088 </td><td>23.466666666666665</td><td>5           </td><td>1                  </td><td>2.0838654041290283 </td><td>-132.586181640625  </td><td>-0.007537414785474539</td><td>-0.003992264624685049 </td><td>-0.047707557678222656</td><td>0.0066642072051763535</td><td>0.9902680687415701 </td><td>-0.13917310096006674</td><td>-0.2225209339563146 </td><td>-0.9749279121818236</td><td>-0.6766984462738037</td><td>-0.7362602949142456 </td><td>1.0180175304412842            </td><td>13.41835160659671       </td><td>13.41835160659671 </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>21,229,614</i></td><td>VTS        </td><td>2012-02-11 22:46:00.000000000</td><td>2012-02-11 22:55:00.000000000</td><td>1                </td><td>CSH           </td><td>1.5499999523162842</td><td>-73.9814224243164 </td><td>40.67967987060547 </td><td>1.0        </td><td>nan                 </td><td>-73.96326446533203 </td><td>40.688507080078125</td><td>6.900000095367432 </td><td>0.5        </td><td>0.5      </td><td>0.0              </td><td>0.0           </td><td>7.900000095367432 </td><td>9.0                </td><td>10.333333015441895</td><td>22.766666666666666</td><td>5           </td><td>1                  </td><td>1.26585853099823   </td><td>64.07402038574219  </td><td>-0.05811910703778267 </td><td>-0.04248497262597084  </td><td>-0.04395797848701477 </td><td>-0.048627279698848724</td><td>0.9483236552061991 </td><td>-0.3173046564050927 </td><td>-0.2225209339563146 </td><td>-0.9749279121818236</td><td>0.4372095465660095 </td><td>0.8993596434593201  </td><td>0.10959190130233765           </td><td>10.303818906286105      </td><td>10.303818906286105</td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>21,229,615</i></td><td>VTS        </td><td>2012-02-11 23:22:00.000000000</td><td>2012-02-11 23:37:00.000000000</td><td>6                </td><td>CSH           </td><td>2.7899999618530273</td><td>-73.9787826538086 </td><td>40.77758026123047 </td><td>1.0        </td><td>nan                 </td><td>-74.00340270996094 </td><td>40.74978256225586 </td><td>10.5              </td><td>0.5        </td><td>0.5      </td><td>0.0              </td><td>0.0           </td><td>11.5              </td><td>15.0               </td><td>11.15999984741211 </td><td>23.366666666666667</td><td>5           </td><td>1                  </td><td>1.781661033630371  </td><td>-138.4691162109375 </td><td>0.021843839436769485 </td><td>0.014060418121516705  </td><td>-0.0156773142516613  </td><td>0.01894466206431389  </td><td>0.9862856015372314 </td><td>-0.16504760586067735</td><td>-0.2225209339563146 </td><td>-0.9749279121818236</td><td>-0.7485985159873962</td><td>-0.6630235910415649 </td><td>0.68240886926651              </td><td>12.045755182438363      </td><td>12.045755182438363</td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>21,229,616</i></td><td>VTS        </td><td>2012-02-11 23:26:00.000000000</td><td>2012-02-11 23:38:00.000000000</td><td>1                </td><td>CRD           </td><td>3.009999990463257 </td><td>-74.00403594970703</td><td>40.73289108276367 </td><td>1.0        </td><td>nan                 </td><td>-74.00830078125    </td><td>40.71181106567383 </td><td>9.699999809265137 </td><td>0.5        </td><td>0.5      </td><td>2.549999952316284</td><td>0.0           </td><td>13.25             </td><td>12.0               </td><td>15.049999952316284</td><td>23.433333333333334</td><td>5           </td><td>1                  </td><td>0.49788349866867065</td><td>-168.56251525878906</td><td>-0.02906632050871849 </td><td>0.007502423599362373  </td><td>-0.04987724870443344 </td><td>0.0017344895750284195</td><td>0.9890158633619168 </td><td>-0.14780941112961052</td><td>-0.2225209339563146 </td><td>-0.9749279121818236</td><td>-0.9801416397094727</td><td>-0.19829867780208588</td><td>-0.7432716488838196           </td><td>8.72141159892713        </td><td>8.72141159892713  </td></tr>\n",
       "<tr><td><i style='opacity: 0.6'>21,229,617</i></td><td>VTS        </td><td>2012-02-11 22:48:00.000000000</td><td>2012-02-11 22:54:00.000000000</td><td>1                </td><td>CSH           </td><td>1.1100000143051147</td><td>-73.9842529296875 </td><td>40.75496292114258 </td><td>1.0        </td><td>nan                 </td><td>-73.98495483398438 </td><td>40.74455261230469 </td><td>5.300000190734863 </td><td>0.5        </td><td>0.5      </td><td>0.0              </td><td>0.0           </td><td>6.300000190734863 </td><td>6.0                </td><td>11.100000143051147</td><td>22.8              </td><td>5           </td><td>1                  </td><td>0.2042892426252365 </td><td>-176.14273071289062</td><td>0.0004582637920975685</td><td>0.004888442344963551  </td><td>-0.009677266702055931</td><td>0.0007326905615627766</td><td>0.9510565162951538 </td><td>-0.3090169943749468 </td><td>-0.2225209339563146 </td><td>-0.9749279121818236</td><td>-0.9977347254753113</td><td>-0.06727108359336853</td><td>-1.069318413734436            </td><td>7.118652009123252       </td><td>7.118652009123252 </td></tr>\n",
       "</tbody>\n",
       "</table>"
      ],
      "text/plain": [
       "#           vendor_id    pickup_datetime                dropoff_datetime               passenger_count    payment_type    trip_distance       pickup_longitude    pickup_latitude     rate_code    store_and_fwd_flag    dropoff_longitude    dropoff_latitude    fare_amount         surcharge    mta_tax    tip_amount         tolls_amount    total_amount        trip_duration_min    trip_speed_mph      pickup_time         pickup_day    pickup_is_weekend    arc_distance         direction_angle      PCA_0                  PCA_1                   PCA_2                  PCA_3                  pickup_time_x        pickup_time_y         pickup_day_x          pickup_day_y         direction_angle_x    direction_angle_y     standard_scaled_arc_distance    predicted_duration_min    pred_final\n",
       "0           CMT          2012-01-10 23:55:50.000000000  2012-01-11 00:03:39.000000000  1                  CRD             1.7000000476837158  -73.99468994140625  40.725032806396484  1.0          0.0                   -73.9759521484375    40.73078155517578   6.900000095367432   0.5          0.5        1.0                0.0             8.899999618530273   7.816666666666666    13.049040877742808  23.916666666666668  1             0                    1.299300193786621    72.94400024414062    -0.029757998883724213  -0.004688636399805546   -0.01604314148426056   -0.014438532292842865  0.9997620270799091   -0.0218148850345609   0.6234898018587336    0.7818314824680298   0.2933062016963959   0.9560185670852661    0.14673005044460297             10.17210051291147         10.17210051291147\n",
       "1           CMT          2012-01-11 19:18:25.000000000  2012-01-11 19:26:10.000000000  1                  CSH             1.100000023841858   -73.98795318603516  40.75294876098633   1.0          0.0                   -73.9945297241211    40.76103973388672   6.099999904632568   1.0          0.5        0.0                0.0             7.599999904632568   7.75                 8.51612921684019    19.3                2             0                    0.4798426032066345   -39.10504150390625   -0.003371396567672491  0.00664413021877408     -0.001381831243634224  0.01789921149611473    0.3338068592337709   -0.9426414910921784   -0.22252093395631434  0.9749279121818236   0.775990903377533    -0.6307440996170044   -0.7633066773414612             9.691096448206059         9.691096448206059\n",
       "2           CMT          2012-01-11 19:19:21.000000000  2012-01-11 19:27:00.000000000  1                  CRD             1.7000000476837158  -73.96751403808594  40.758453369140625  1.0          0.0                   -73.95658111572266   40.779903411865234  6.900000095367432   1.0          0.5        1.0                0.0             9.399999618530273   7.65                 13.333333707323261  19.316666666666666  2             0                    0.8592353463172913   27.00758934020996    0.013282216154038906   -0.0064217280596494675  0.03550034016370773    -0.002972794696688652  0.3379167180033267   -0.9411760152563707   -0.22252093395631434  0.9749279121818236   0.8909463882446289   0.4541085362434387    -0.3419775664806366             9.76290952872819          9.76290952872819\n",
       "3           CMT          2012-01-11 14:38:15.000000000  2012-01-11 14:43:51.000000000  1                  CSH             1.2000000476837158  -74.01131439208984  40.711448669433594  1.0          0.0                   -74.00286865234375   40.72813034057617   5.699999809265137   0.0          0.5        0.0                0.0             6.199999809265137   5.6                  12.857143368039813  14.633333333333333  2             0                    0.6643630266189575   26.85257339477539    -0.050594620406627655  0.00048208795487880707  -0.033315420150756836  0.006374814547598362   -0.7716245833877202  -0.6360782202777636   -0.22252093395631434  0.9749279121818236   0.8921717405319214   0.45169636607170105   -0.558390200138092              11.53348426067309         11.53348426067309\n",
       "4           VTS          2012-01-09 19:14:00.000000000  2012-01-09 19:20:00.000000000  1                  CSH             1.25                -73.99333190917969  40.727718353271484  1.0          nan                   -73.9815673828125    40.7392463684082    6.099999904632568   1.0          0.5        0.0                0.0             7.599999904632568   6.0                  12.5                19.233333333333334  0             0                    0.842030942440033    45.581756591796875   -0.026794197037816048  -0.004166812635958195   -0.01217577699571848   -0.005045588128268719  0.31730465640509226  -0.9483236552061993   1.0                   0.0                  0.6998907923698425   0.714249849319458     -0.3610836863517761             9.972671716598873         9.972671716598873\n",
       "...         ...          ...                            ...                            ...                ...             ...                 ...                 ...                 ...          ...                   ...                  ...                 ...                 ...          ...        ...                ...             ...                 ...                  ...                 ...                 ...           ...                  ...                  ...                  ...                    ...                     ...                    ...                    ...                  ...                   ...                   ...                  ...                  ...                   ...                             ...                       ...\n",
       "21,229,613  VTS          2012-02-11 23:28:00.000000000  2012-02-11 23:45:00.000000000  3                  CSH             6.119999885559082   -73.98193359375     40.74324035644531   1.0          nan                   -74.01116943359375   40.71636962890625   16.899999618530273  0.5          0.5        0.0                0.0             17.899999618530273  17.0                 21.59999959609088   23.466666666666665  5             1                    2.0838654041290283   -132.586181640625    -0.007537414785474539  -0.003992264624685049   -0.047707557678222656  0.0066642072051763535  0.9902680687415701   -0.13917310096006674  -0.2225209339563146   -0.9749279121818236  -0.6766984462738037  -0.7362602949142456   1.0180175304412842              13.41835160659671         13.41835160659671\n",
       "21,229,614  VTS          2012-02-11 22:46:00.000000000  2012-02-11 22:55:00.000000000  1                  CSH             1.5499999523162842  -73.9814224243164   40.67967987060547   1.0          nan                   -73.96326446533203   40.688507080078125  6.900000095367432   0.5          0.5        0.0                0.0             7.900000095367432   9.0                  10.333333015441895  22.766666666666666  5             1                    1.26585853099823     64.07402038574219    -0.05811910703778267   -0.04248497262597084    -0.04395797848701477   -0.048627279698848724  0.9483236552061991   -0.3173046564050927   -0.2225209339563146   -0.9749279121818236  0.4372095465660095   0.8993596434593201    0.10959190130233765             10.303818906286105        10.303818906286105\n",
       "21,229,615  VTS          2012-02-11 23:22:00.000000000  2012-02-11 23:37:00.000000000  6                  CSH             2.7899999618530273  -73.9787826538086   40.77758026123047   1.0          nan                   -74.00340270996094   40.74978256225586   10.5                0.5          0.5        0.0                0.0             11.5                15.0                 11.15999984741211   23.366666666666667  5             1                    1.781661033630371    -138.4691162109375   0.021843839436769485   0.014060418121516705    -0.0156773142516613    0.01894466206431389    0.9862856015372314   -0.16504760586067735  -0.2225209339563146   -0.9749279121818236  -0.7485985159873962  -0.6630235910415649   0.68240886926651                12.045755182438363        12.045755182438363\n",
       "21,229,616  VTS          2012-02-11 23:26:00.000000000  2012-02-11 23:38:00.000000000  1                  CRD             3.009999990463257   -74.00403594970703  40.73289108276367   1.0          nan                   -74.00830078125      40.71181106567383   9.699999809265137   0.5          0.5        2.549999952316284  0.0             13.25               12.0                 15.049999952316284  23.433333333333334  5             1                    0.49788349866867065  -168.56251525878906  -0.02906632050871849   0.007502423599362373    -0.04987724870443344   0.0017344895750284195  0.9890158633619168   -0.14780941112961052  -0.2225209339563146   -0.9749279121818236  -0.9801416397094727  -0.19829867780208588  -0.7432716488838196             8.72141159892713          8.72141159892713\n",
       "21,229,617  VTS          2012-02-11 22:48:00.000000000  2012-02-11 22:54:00.000000000  1                  CSH             1.1100000143051147  -73.9842529296875   40.75496292114258   1.0          nan                   -73.98495483398438   40.74455261230469   5.300000190734863   0.5          0.5        0.0                0.0             6.300000190734863   6.0                  11.100000143051147  22.8                5             1                    0.2042892426252365   -176.14273071289062  0.0004582637920975685  0.004888442344963551    -0.009677266702055931  0.0007326905615627766  0.9510565162951538   -0.3090169943749468   -0.2225209339563146   -0.9749279121818236  -0.9977347254753113  -0.06727108359336853  -1.069318413734436              7.118652009123252         7.118652009123252"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-06-10T17:22:40.218782Z",
     "start_time": "2020-06-10T17:22:38.467139Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "14aa8d697bff4d319560102076bc6a1e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=1.0), Label(value='In progress...')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "array([-1.34434498, 62.22383593])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test.predicted_duration_min.minmax(progress='widget')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-06-10T17:22:43.498924Z",
     "start_time": "2020-06-10T17:22:40.219792Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "274150b970a04f7ca4c05de4cb104a84",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=1.0), Label(value='In progress...')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "array([ 3., 25.])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_test.pred_final.minmax(progress='widget')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-06-03T15:20:50.019022Z",
     "start_time": "2020-06-03T15:20:49.882430Z"
    }
   },
   "source": [
    "# Thank you!"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
